#FYI, this example will likely be updated over time!

library("survival") #package for standard survival analysis
library("forestplot") #package for making estimate `forest plot'
library("ggplot2") #package for making some additional plots
library("knitr") #package for knitting markdown files
library("SemiCompRisks") #package for semi competing risks analysis

#now, to install two packages from GitHub for updated semi-competing risks functions
#note, this only needs to be done once, so it is presently commented out

# library("devtools")
# install_github("harrisonreeder/SemiCompRisksFreq")
# install_github("harrisonreeder/SemiCompRisksPen")

library("SemiCompRisksPen") #package for penalized illness-death modeling
library("SemiCompRisksFreq") #package for frequentist illness-death modeling

#color blind friendly colors from here: https://davidmathlogic.com/colorblind/#%23648FFF-%23785EF0-%23DC267F-%23FE6100-%23FFB000
cb_blue <- "#648FFF"; cb_red <- "#DC267F"; cb_purple <- "#785EF0"; cb_orange <- "#FE6100"; cb_grey <- "#CACACA"

#pink and green color-blind friendly
four_color_paired <- RColorBrewer::brewer.pal(n=4,name="PiYG")[c(3,4,2,1)]
# four_color_paired <- RColorBrewer::brewer.pal(n=4,name="Paired")


#colors used in plots
two_color_cb <- c(cb_blue,cb_red)

three_color <- c("dodgerblue","firebrick3","purple3")
three_color_cb <- c(cb_blue,cb_red,cb_purple)

#take three cb colors and reduce V from 100 to 60
three_color_cb_dark <- c("#3b5699", "#751342", "#453589")


four_color <- c("lightgray","firebrick3","purple3","dodgerblue")
four_color_cb <- c(cb_grey,cb_red,cb_purple,cb_blue)
four_color_forest <- c("dodgerblue","firebrick3","purple3","magenta")
four_color_forest_cb <- c(cb_blue,cb_red,cb_purple,cb_orange)
five_color <- c("lightgray","firebrick3","magenta","purple3","dodgerblue")
five_color_cb <- c(cb_grey,cb_red,cb_orange,cb_purple,cb_blue)

# RColorBrewer::display.brewer.all(n=4,colorblindFriendly = TRUE)
# color-blind friendly categorical colors
three_color_qual <- RColorBrewer::brewer.pal(n=3,name="Set2")
four_color_qual <- RColorBrewer::brewer.pal(n=4,name="Dark2")
five_color_qual <- RColorBrewer::brewer.pal(n=5,name="Dark2")

Data Review

data("scrData") #load sample dataset from SemiCompRisks package

#for examples below, create a binary covariate
scrData$x1_bin <- as.numeric(scrData$x1>0)

#define "sojourn time"
scrData$sojourn <- scrData$time2 - scrData$time1

#define version of terminal flag that treats non-terminal event as censoring
scrData$event2_cr <- ifelse(scrData$event1==1, 0, scrData$event2)
#define version of event flag that is 0 for censoring, 1 for non-terminal, 2 for terminal
#for use when fitting strictly competing risks models
scrData$event_cr_num <- ifelse(scrData$event1==1, 1, ifelse(scrData$event2==1,2,0))
scrData$event_cr_fct <- factor(scrData$event_cr_num,levels=c("0","1","2"),
                               labels=c("cens","nonterm","term"))

outcome_vec <- numeric(NROW(scrData))
outcome_vec[scrData$event1==1 & scrData$event2==0] <- 1
outcome_vec[scrData$event1==0 & scrData$event2==1] <- 2
outcome_vec[scrData$event1==1 & scrData$event2==1] <- 3
scrData$outcome_cat <- factor(as.character(outcome_vec), levels=c("0","1","2","3"),
                              labels=c("neither","nonterm_only","term_only","both"))

#roughly choose quartiles of data
scrData$t1_cat <- as.factor(cut(scrData$time1,
                                breaks=c(0,.7,4.4,27,Inf),
                                right = FALSE,include.lowest = TRUE,labels=FALSE))
scrData <- cbind(scrData,
                 as.data.frame(model.matrix(~ 0 + t1_cat, scrData)))

# #rows corresponding to "first" transition
# temp_dat1 <- scrData
# temp_dat1$start_time <- 0
# temp_dat1$stop_time <- scrData$time1
# temp_dat1$event_msm1 <- scrData$event1
# temp_dat1$event_msm2 <- scrData$event_cr_num
# 
# #rows corresponding to "second" transition if applicable
# temp_dat2 <- scrData[scrData$event1==1,]
# temp_dat2$start_days <- temp_dat2$time1
# temp_dat2$stop_days <- temp_dat2$time2
# temp_dat2$pe_deliv_msm1 <- ifelse(temp_dat2$event2,2,0)
# temp_dat2$pe_deliv_msm2 <- ifelse(temp_dat2$event2,3,0)
# 
# #combine and sort data
# scrData_long <- rbind(temp_dat1,temp_dat2) %>%
#   arrange(hr_mrn_d,hr_mother_fisc_d,start_days) %>%
#   group_by(hr_mrn_d,hr_mother_fisc_d) %>%
#   mutate(id = cur_group_id(),
#          pe_deliv_msm1 = factor(pe_deliv_msm1,levels=c("0","1","2"),labels=c("cens","pe","deliv")),
#          pe_deliv_msm2 = factor(pe_deliv_msm2,levels=c("0","1","2","3"),labels=c("cens","pe","deliv_wo_pe","deliv_w_pe")),
#   )

Timing of Outcomes

#Empirical "Density" Plot
scat_plot <- ggplot(data=scrData[scrData$event1==1,],
                    mapping = aes(x=time1,
                                  y=time2, color=as.factor(event2))) +
  xlab("T1") +
  ylab("T2") + xlim(0,62) + ylim(0,62) +
  geom_point(alpha=0.25,size=1) + theme_classic() +
  geom_abline(slope=1) + 
  theme(legend.position = "bottom")
#histogram of terminal only times
hist_plot <- ggplot(data=scrData[scrData$event1==0,],
                    mapping = aes(x=time2,fill=as.factor(event2))) + 
  xlim(0,62) +
  geom_histogram(binwidth = 1,col="white",linewidth=0.25) +
  theme_classic() + coord_flip() + xlab("T2 without T1") + 
  ylab("Count")  +
  theme(legend.position = "bottom", legend.title = element_blank())
#combined "empirical joint density"
cowplot::plot_grid(scat_plot,hist_plot, nrow = 1, rel_widths = c(7,3))

#km curve for delivery overall
fit_km_term <- survfit(Surv(time2, event=event2) ~ 1, data=scrData)
plot(fit_km_term,fun="F",conf.int = TRUE)

#aalen-johansson curv es for CIF of preeclampsia and delivery competing risks
fit_aj_cr <- survfit(Surv(time1, event=event_cr_fct) ~ 1, data=scrData)
plot(fit_aj_cr,col = two_color_cb,lwd=2, conf.int=TRUE)

Analysis

Univariate Models

X1 (Binary)

form_h1 <- Formula::as.Formula("time1 + event1 ~ x1_bin")
form_h2 <- Formula::as.Formula("time1 + event2_cr ~ x1_bin")
#remember this should be used with subset of those with PE
form_h3 <- Formula::as.Formula("sojourn + event2 ~ x1_bin")

#list to store the univariate models
uni_freq_fit_list <- list()

#let's loop through every possible baseline
for(haz in c("wb", "bs", "pw","rp", NULL)){
  print(haz)
  #set number of baseline parameters depending on specification
  p0_temp <- if(haz=="wb") 2 else 4
  
  #knot locations will be set automatically

  #first, fit "cause-specific" hazard for non-terminal event
  uni_freq_fit_list[[paste0(haz,"_",1)]] <-
    SemiCompRisksFreq::FreqSurv_HReg2(Formula=form_h1,
      data = scrData, hazard = haz,
      p0 = p0_temp, optim_method = "BFGS")
  
  #second, fit "cause-specific" hazard for terminal event
  uni_freq_fit_list[[paste0(haz,"_",2)]] <-
    SemiCompRisksFreq::FreqSurv_HReg2(Formula=form_h2,
      data = scrData, hazard = haz,
      p0 = p0_temp, optim_method = "BFGS")
  
  #third, fit hazard for terminal event following non-terminal event
  #on 'sojourn time' scale, following semi-markov model structure
  uni_freq_fit_list[[paste0(haz,"_",3)]] <-
    SemiCompRisksFreq::FreqSurv_HReg2(Formula=form_h3,
      data = scrData[scrData$event1==1,],
      hazard = haz, p0 = p0_temp, optim_method = "BFGS")
}
## [1] "wb"
## [1] "bs"
## [1] "pw"
## [1] "rp"
#next, fit corresponding kaplan meier curves stratified by binary X1
uni_freq_fit_list[[paste0("km_",1)]] <- 
  survfit(Surv(time=time1, event=event1) ~ x1_bin,
                      data=scrData)
uni_freq_fit_list[[paste0("km_",2)]] <- 
  survfit(Surv(time=time1, event=event2_cr) ~ x1_bin,
                      data=scrData)
uni_freq_fit_list[[paste0("km_",3)]] <- 
  survfit(Surv(time=sojourn, event=event2) ~ x1_bin,
                      data=scrData[scrData$event1==1,])

#next, fit corresponding kaplan meier curves stratified by binary X1
uni_freq_fit_list[[paste0("cox_",1)]] <- 
  coxph(Surv(time=time1, event=event1) ~ x1_bin,
                      data=scrData)
uni_freq_fit_list[[paste0("cox_",2)]] <- 
  coxph(Surv(time=time1, event=event2_cr) ~ x1_bin,
                      data=scrData)
uni_freq_fit_list[[paste0("cox_",3)]] <- 
  coxph(Surv(time=sojourn, event=event2) ~ x1_bin,
                      data=scrData[scrData$event1==1,])
#function is very steep near zero
t_seq <- c(0.001,0.005,0.01,0.05,seq(from=1, to=60, by=0.1))

for(bl in c("wb","bs","pw","rp")){
  bl_label <- switch(bl, "wb"="Weibull", "bs"="B-Spline",
                         "pw"="Piecewise Constant","rp"="Royston-Parmar")
  for(haz in 1:2){
    pred0 <- SemiCompRisksFreq:::predict.Freq_HReg2(uni_freq_fit_list[[paste0(bl,"_",haz)]],
                     tseq=t_seq, xnew = as.matrix(c(0)))
    pred1 <- SemiCompRisksFreq:::predict.Freq_HReg2(uni_freq_fit_list[[paste0(bl,"_",haz)]],
                     tseq=t_seq, xnew = as.matrix(c(1)))

    plot(uni_freq_fit_list[[paste0("km_",haz)]],
         lwd=2, lty=1, col=four_color_paired[c(1,3)],
         xlab="Time",
         ylab=switch(haz,
                     "Survivor Function of Non-Terminal",
                     "Survivor Function of Terminal"))
    matplot(x = t_seq,
            y = cbind(pred0$S$S,pred1$S$S),
            type = "l", add=T,
            lwd=2, lty=1, col=four_color_paired[c(2,4)])
    legend(x = "bottomleft", fill=four_color_paired,
           legend = c("Kaplan-Meier, no X1",
                      paste0(bl_label,", no X1"),
                      "Kaplan-Meier, X1",
                      paste0(bl_label,", X1")) )
  }

  #h3 plotted on sojourn scale
  pred0 <- SemiCompRisksFreq:::predict.Freq_HReg2(uni_freq_fit_list[[paste0(bl,"_",3)]],
                   tseq=t_seq, xnew = as.matrix(c(0)))
  pred1 <- SemiCompRisksFreq:::predict.Freq_HReg2(uni_freq_fit_list[[paste0(bl,"_",3)]],
                   tseq=t_seq, xnew = as.matrix(c(1)))

  plot(uni_freq_fit_list[[paste0("km_",3)]],xlim=c(0,5),
       lwd=2, lty=1, col=four_color_paired[c(1,3)],
       xlab="Sojourn Time",
       ylab="Conditional Survivor Function of Terminal")
  matplot(x = t_seq,
          y= cbind(pred0$S$S,pred1$S$S),
          type = "l", add=T,
          lwd=2, lty=1, col=four_color_paired[c(2,4)])
  legend(x = "bottomleft", fill=four_color_paired,
         legend = c("Kaplan-Meier, no X1",
                    paste0(bl_label,", no X1"),
                    "Kaplan-Meier, X1",
                    paste0(bl_label,", X1")))
}

#get hazard ratios from every fit
exp(sapply(uni_freq_fit_list[c("wb_1","wb_2","wb_3",
                               "bs_1","bs_2","bs_3",
                               "pw_1","pw_2","pw_3",
                               "rp_1","rp_2","rp_3")],
       function(x) x$estimate["x1_bin"]))
## wb_1.x1_bin wb_2.x1_bin wb_3.x1_bin bs_1.x1_bin bs_2.x1_bin bs_3.x1_bin 
##    1.187546    1.635465    1.792493    1.183555    1.624836    1.796437 
## pw_1.x1_bin pw_2.x1_bin pw_3.x1_bin rp_1.x1_bin rp_2.x1_bin rp_3.x1_bin 
##    1.186683    1.632167    1.799576    1.175221    1.618369    1.787178
summary(uni_freq_fit_list$cox_1)$coefficient
##             coef exp(coef)   se(coef)        z    Pr(>|z|)
## x1_bin 0.1615185  1.175294 0.05502187 2.935533 0.003329753
summary(uni_freq_fit_list$cox_2)$coefficient
##             coef exp(coef)  se(coef)        z     Pr(>|z|)
## x1_bin 0.4805676  1.616992 0.1088953 4.413114 1.018942e-05
summary(uni_freq_fit_list$cox_3)$coefficient
##             coef exp(coef)   se(coef)        z     Pr(>|z|)
## x1_bin 0.5815168   1.78875 0.08034825 7.237455 4.571835e-13

Illness-Death Models

Once we’ve explored the data sufficiently, we may fit actual gamma-frailty illness death models. Here we focus on methods where each transition submodel has a proportional hazards specification.

form_temp <- Formula::Formula(time1 + event1 | time2 + event2 ~ x1 + x2 + x3 | x1 + x2 + x3 | x1 + x2 + x3)
form_temp_t1cat <- Formula::Formula(time1 + event1 | time2 + event2 ~ x1 + x2 + x3 | x1 + x2 + x3 | x1 + x2 + x3 + t1_cat2 + t1_cat3 + t1_cat4)

#pick best of two different optimization algorithms
opt_meth <- c("BFGS","L-BFGS",NULL)

std_freq_fit_list <- list()
for(haz in c("wb","pw","bs","rp",
             NULL)){
  for(frail in c("frail","nofrail",
                 NULL)){
    print(paste0(haz," ",frail))
    #set number of baseline parameters depending on specification
    nP0_temp <- if(haz=="wb") c(2,2,2) else nP0_temp <- c(4,4,4)
    
    print("markov")
    std_freq_fit_list[[paste0(haz,"_markov","_",frail)]] <-
      SemiCompRisksFreq::FreqID_HReg2(Formula=form_temp, data = scrData,
                                      hazard = haz, model = "Markov",
                                      nP0 = nP0_temp,
                                      frailty = (frail=="frail"), 
                                      optim_method = opt_meth,
                                      extra_starts = 0)
    print("not1")
    std_freq_fit_list[[paste0(haz,"_not1cat","_",frail)]] <-
      SemiCompRisksFreq::FreqID_HReg2(Formula=form_temp, 
                                      data = scrData,
                                      hazard = haz, model = "semi-Markov",
                                      nP0 = nP0_temp,
                                      frailty = (frail=="frail"), 
                                      optim_method = opt_meth,
                                      extra_starts = 0)
    print("t1cat")
    std_freq_fit_list[[paste0(haz,"_t1cat","_",frail)]] <-
      SemiCompRisksFreq::FreqID_HReg2(Formula=form_temp_t1cat, 
                                      data = scrData,
                                      hazard = haz, model = "semi-Markov",
                                      nP0 = nP0_temp,
                                      frailty = (frail=="frail"), 
                                      optim_method = opt_meth,
                                      extra_starts = 0)
  }
}
## [1] "wb frail"
## [1] "markov"
## [1] "not1"
## [1] "t1cat"
## [1] "wb nofrail"
## [1] "markov"
## [1] "not1"
## [1] "t1cat"
## [1] "pw frail"
## [1] "markov"
## [1] "not1"
## [1] "t1cat"
## [1] "pw nofrail"
## [1] "markov"
## [1] "not1"
## [1] "t1cat"
## [1] "bs frail"
## [1] "markov"
## [1] "not1"
## [1] "t1cat"
## [1] "bs nofrail"
## [1] "markov"
## [1] "not1"
## [1] "t1cat"
## [1] "rp frail"
## [1] "markov"
## [1] "not1"
## 
## [1] "t1cat"
## [1] "rp nofrail"
## [1] "markov"
## [1] "not1"
## 
## [1] "t1cat"
sapply(std_freq_fit_list,logLik)
##    wb_markov_frail   wb_not1cat_frail     wb_t1cat_frail  wb_markov_nofrail 
##          -8873.491          -8867.733          -8866.183          -6090.495 
## wb_not1cat_nofrail   wb_t1cat_nofrail    pw_markov_frail   pw_not1cat_frail 
##          -8943.991          -8923.057          -8961.295          -8931.915 
##     pw_t1cat_frail  pw_markov_nofrail pw_not1cat_nofrail   pw_t1cat_nofrail 
##          -8916.928          -9044.533          -9052.181          -9029.952 
##    bs_markov_frail   bs_not1cat_frail     bs_t1cat_frail  bs_markov_nofrail 
##          -9016.994          -8984.230          -8943.057          -9097.578 
## bs_not1cat_nofrail   bs_t1cat_nofrail    rp_markov_frail   rp_not1cat_frail 
##          -9113.154          -9091.660          -8867.340          -8860.779 
##     rp_t1cat_frail  rp_markov_nofrail rp_not1cat_nofrail   rp_t1cat_nofrail 
##          -8859.819          -8878.983          -8912.261          -8889.829
for(haz in c("wb","pw","bs","rp",
             NULL)){
  for(frail in c("frail",
                 "nofrail",
                 NULL)){
    print(paste0(haz," ",frail))
    temp_fit <- std_freq_fit_list[[paste0(haz,"_t1cat","_",frail)]]
    print(temp_fit)
    print(summary(temp_fit))
    pred_temp_fit <- SemiCompRisksFreq:::predict.Freq_HReg2(temp_fit)
    # print(pred_temp_fit)
    plot(pred_temp_fit)
  }
}
## [1] "wb frail"
## 
## Analysis of independent semi-competing risks data 
## Weibull baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Variance of frailties, theta:
##   theta      SE      LL      UL    test  pvalue 
##   0.776   0.098   0.607   0.993 113.747   0.000 
## SE computed from SE(log(theta)) via delta method.
## Bounds formed for log(theta) and exponentiated.
## Likelihood ratio of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.244 0.039  0.168  0.320   6.289  0.000
## x2       0.617 0.044  0.531  0.703  14.062  0.000
## x3      -0.313 0.040 -0.391 -0.234  -7.835  0.000
## x1       0.432 0.063  0.310  0.555   6.905  0.000
## x2       0.748 0.069  0.613  0.883  10.880  0.000
## x3      -0.400 0.064 -0.525 -0.275  -6.281  0.000
## x1       0.712 0.057  0.600  0.824  12.473  0.000
## x2       0.906 0.066  0.776  1.035  13.709  0.000
## x3      -0.891 0.059 -1.007 -0.774 -15.027  0.000
## t1_cat2 -0.049 0.123 -0.289  0.191  -0.399  0.690
## t1_cat3  0.160 0.159 -0.152  0.473   1.004  0.315
## t1_cat4  0.327 0.291 -0.244  0.898   1.124  0.261
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## Weibull baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL    UL
## x1           1.276 1.183 1.377      1.541 1.363 1.742      2.038 1.823 2.280
## x2           1.854 1.701 2.021      2.113 1.847 2.418      2.474 2.173 2.816
## x3           0.732 0.677 0.791      0.670 0.591 0.759      0.410 0.365 0.461
## t1_cat2         NA    NA    NA         NA    NA    NA      0.952 0.749 1.211
## t1_cat3         NA    NA    NA         NA    NA    NA      1.174 0.859 1.604
## t1_cat4         NA    NA    NA         NA    NA    NA      1.387 0.784 2.455
## 
## Variance of frailties:
##   theta      SE      LL      UL    test  pvalue 
##   0.776   0.098   0.607   0.993 113.747   0.000 
## SE computed from log(theta) via delta method. Bounds exponentiated from log(theta).
## Likelihood ratio test of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Baseline hazard function components:
##                     h1-PM    SE     LL     UL  h2-PM    SE     LL     UL  h3-PM
## Weibull: log-kappa -1.162 0.049 -1.258 -1.066 -2.892 0.093 -3.074 -2.709 -3.185
## Weibull: log-alpha -0.516 0.036 -0.586 -0.445 -0.283 0.047 -0.376 -0.191 -0.397
##                       SE     LL     UL
## Weibull: log-kappa 0.125 -3.430 -2.940
## Weibull: log-alpha 0.034 -0.464 -0.329

## [1] "wb nofrail"
## 
## Analysis of independent semi-competing risks data 
## Weibull baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Non-frailty model fit
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.164 0.028  0.109  0.218   5.872      0
## x2       0.417 0.028  0.362  0.473  14.710      0
## x3      -0.214 0.028 -0.269 -0.159  -7.598      0
## x1       0.334 0.055  0.227  0.441   6.124      0
## x2       0.503 0.056  0.393  0.613   8.955      0
## x3      -0.279 0.055 -0.388 -0.171  -5.043      0
## x1       0.503 0.044  0.417  0.588  11.502      0
## x2       0.546 0.043  0.461  0.632  12.575      0
## x3      -0.649 0.044 -0.735 -0.562 -14.701      0
## t1_cat2 -0.330 0.093 -0.511 -0.148  -3.556      0
## t1_cat3 -0.559 0.107 -0.770 -0.349  -5.206      0
## t1_cat4 -0.946 0.233 -1.404 -0.489  -4.057      0
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## Weibull baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL    UL
## x1           1.178 1.115 1.244      1.396 1.255 1.554      1.653 1.517 1.801
## x2           1.518 1.436 1.605      1.654 1.481 1.846      1.727 1.586 1.881
## x3           0.807 0.764 0.853      0.756 0.679 0.843      0.523 0.479 0.570
## t1_cat2         NA    NA    NA         NA    NA    NA      0.719 0.600 0.862
## t1_cat3         NA    NA    NA         NA    NA    NA      0.572 0.463 0.706
## t1_cat4         NA    NA    NA         NA    NA    NA      0.388 0.246 0.613
## 
## Non-frailty model fit
## 
## Baseline hazard function components:
##                     h1-PM    SE     LL     UL  h2-PM    SE     LL     UL  h3-PM
## Weibull: log-kappa -1.324 0.039 -1.401 -1.248 -3.003 0.089 -3.178 -2.829 -2.493
## Weibull: log-alpha -0.801 0.023 -0.846 -0.757 -0.578 0.042 -0.661 -0.495 -0.552
##                       SE     LL     UL
## Weibull: log-kappa 0.096 -2.682 -2.304
## Weibull: log-alpha 0.033 -0.617 -0.487

## [1] "pw frail"
## 
## Analysis of independent semi-competing risks data 
## Piecewise Constant baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Variance of frailties, theta:
##   theta      SE      LL      UL    test  pvalue 
##   1.566   0.148   1.302   1.884 226.048   0.000 
## SE computed from SE(log(theta)) via delta method.
## Bounds formed for log(theta) and exponentiated.
## Likelihood ratio of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.320 0.048  0.226  0.414   6.671  0.000
## x2       0.803 0.053  0.698  0.908  15.007  0.000
## x3      -0.405 0.050 -0.502 -0.308  -8.162  0.000
## x1       0.510 0.069  0.374  0.645   7.384  0.000
## x2       0.947 0.075  0.799  1.095  12.549  0.000
## x3      -0.498 0.071 -0.637 -0.359  -7.032  0.000
## x1       0.854 0.064  0.728  0.979  13.312  0.000
## x2       1.162 0.074  1.016  1.308  15.611  0.000
## x3      -1.056 0.067 -1.188 -0.925 -15.761  0.000
## t1_cat2  0.290 0.139  0.018  0.561   2.090  0.037
## t1_cat3  0.881 0.189  0.510  1.252   4.657  0.000
## t1_cat4  1.509 0.335  0.854  2.165   4.511  0.000
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## Piecewise Constant baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL    UL
## x1           1.377 1.254 1.513      1.665 1.454 1.906      2.348 2.071 2.663
## x2           2.232 2.009 2.478      2.579 2.224 2.990      3.197 2.763 3.700
## x3           0.667 0.605 0.735      0.608 0.529 0.698      0.348 0.305 0.397
## t1_cat2         NA    NA    NA         NA    NA    NA      1.336 1.018 1.753
## t1_cat3         NA    NA    NA         NA    NA    NA      2.413 1.666 3.497
## t1_cat4         NA    NA    NA         NA    NA    NA      4.524 2.348 8.715
## 
## Variance of frailties:
##   theta      SE      LL      UL    test  pvalue 
##   1.566   0.148   1.302   1.884 226.048   0.000 
## SE computed from log(theta) via delta method. Bounds exponentiated from log(theta).
## Likelihood ratio test of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Baseline hazard function components:
##                           h1-PM    SE     LL     UL  h2-PM    SE     LL     UL
## Piecewise Constant: phi1 -0.548 0.068 -0.681 -0.415 -2.648 0.122 -2.887 -2.409
## Piecewise Constant: phi2 -1.378 0.088 -1.550 -1.207 -3.005 0.140 -3.279 -2.730
## Piecewise Constant: phi3 -1.782 0.126 -2.030 -1.535 -2.890 0.172 -3.227 -2.553
## Piecewise Constant: phi4 -1.830 0.194 -2.211 -1.449 -2.868 0.225 -3.309 -2.427
##                           h3-PM    SE     LL     UL
## Piecewise Constant: phi1 -3.738 0.129 -3.992 -3.485
## Piecewise Constant: phi2 -4.056 0.122 -4.295 -3.816
## Piecewise Constant: phi3 -4.654 0.120 -4.889 -4.419
## Piecewise Constant: phi4 -5.027 0.119 -5.260 -4.794
## 
## Knots:
##           h1     h2     h3
## knot1  0.000  0.000  0.000
## knot2  0.342  0.853  1.836
## knot3  2.137  4.311  6.299
## knot4 10.007 13.669 20.241

## [1] "pw nofrail"
## 
## Analysis of independent semi-competing risks data 
## Piecewise Constant baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Non-frailty model fit
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.167 0.028  0.112  0.222   5.978      0
## x2       0.421 0.029  0.365  0.477  14.775      0
## x3      -0.215 0.028 -0.270 -0.160  -7.647      0
## x1       0.336 0.055  0.229  0.442   6.148      0
## x2       0.506 0.056  0.396  0.617   9.001      0
## x3      -0.280 0.055 -0.389 -0.172  -5.063      0
## x1       0.510 0.044  0.425  0.596  11.678      0
## x2       0.550 0.044  0.465  0.636  12.640      0
## x3      -0.652 0.044 -0.739 -0.565 -14.737      0
## t1_cat2 -0.334 0.093 -0.516 -0.153  -3.608      0
## t1_cat3 -0.566 0.107 -0.776 -0.356  -5.277      0
## t1_cat4 -0.999 0.234 -1.457 -0.541  -4.272      0
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## Piecewise Constant baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL    UL
## x1           1.182 1.119 1.248      1.399 1.257 1.557      1.666 1.529 1.815
## x2           1.524 1.441 1.611      1.659 1.486 1.853      1.734 1.592 1.888
## x3           0.806 0.763 0.852      0.756 0.678 0.842      0.521 0.478 0.568
## t1_cat2         NA    NA    NA         NA    NA    NA      0.716 0.597 0.858
## t1_cat3         NA    NA    NA         NA    NA    NA      0.568 0.460 0.701
## t1_cat4         NA    NA    NA         NA    NA    NA      0.368 0.233 0.582
## 
## Non-frailty model fit
## 
## Baseline hazard function components:
##                           h1-PM    SE     LL     UL  h2-PM    SE     LL     UL
## Piecewise Constant: phi1 -0.655 0.056 -0.765 -0.545 -2.862 0.110 -3.078 -2.646
## Piecewise Constant: phi2 -1.993 0.055 -2.102 -1.885 -3.857 0.109 -4.071 -3.642
## Piecewise Constant: phi3 -3.015 0.055 -3.123 -2.907 -4.375 0.108 -4.587 -4.163
## Piecewise Constant: phi4 -4.041 0.055 -4.150 -3.933 -5.229 0.108 -5.440 -5.018
##                           h3-PM    SE     LL     UL
## Piecewise Constant: phi1 -2.733 0.100 -2.929 -2.538
## Piecewise Constant: phi2 -3.327 0.098 -3.519 -3.136
## Piecewise Constant: phi3 -4.130 0.096 -4.318 -3.941
## Piecewise Constant: phi4 -4.699 0.094 -4.883 -4.514
## 
## Knots:
##           h1     h2     h3
## knot1  0.000  0.000  0.000
## knot2  0.342  0.853  1.836
## knot3  2.137  4.311  6.299
## knot4 10.007 13.669 20.241

## [1] "bs frail"
## 
## Analysis of independent semi-competing risks data 
## B-Spline baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Variance of frailties, theta:
##   theta      SE      LL      UL    test  pvalue 
##   2.403   0.219   2.010   2.873 297.207   0.000 
## SE computed from SE(log(theta)) via delta method.
## Bounds formed for log(theta) and exponentiated.
## Likelihood ratio of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.387 0.057  0.275  0.498   6.777      0
## x2       0.963 0.061  0.844  1.082  15.850      0
## x3      -0.487 0.059 -0.603 -0.372  -8.292      0
## x1       0.569 0.076  0.421  0.717   7.533      0
## x2       1.099 0.081  0.941  1.256  13.645      0
## x3      -0.575 0.078 -0.727 -0.423  -7.402      0
## x1       0.933 0.070  0.796  1.071  13.320      0
## x2       1.354 0.081  1.196  1.513  16.784      0
## x3      -1.164 0.074 -1.308 -1.020 -15.825      0
## t1_cat2  0.781 0.152  0.483  1.078   5.137      0
## t1_cat3  1.782 0.217  1.358  2.207   8.225      0
## t1_cat4  2.731 0.392  1.962  3.500   6.959      0
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## B-Spline baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL     UL
## x1           1.472 1.316 1.646      1.767 1.523 2.048      2.543 2.217  2.918
## x2           2.620 2.326 2.951      3.000 2.562 3.513      3.875 3.308  4.539
## x3           0.614 0.547 0.689      0.563 0.483 0.655      0.312 0.270  0.361
## t1_cat2         NA    NA    NA         NA    NA    NA      2.183 1.620  2.940
## t1_cat3         NA    NA    NA         NA    NA    NA      5.945 3.887  9.091
## t1_cat4         NA    NA    NA         NA    NA    NA     15.345 7.111 33.112
## 
## Variance of frailties:
##   theta      SE      LL      UL    test  pvalue 
##   2.403   0.219   2.010   2.873 297.207   0.000 
## SE computed from log(theta) via delta method. Bounds exponentiated from log(theta).
## Likelihood ratio test of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Baseline hazard function components:
##                 h1-PM    SE     LL     UL  h2-PM    SE     LL     UL  h3-PM
## B-Spline: phi1 -0.756 0.078 -0.909 -0.604 -2.556 0.119 -2.788 -2.323 -3.933
## B-Spline: phi2 -2.145 0.539 -3.202 -1.089 -1.143 0.635 -2.389  0.102 -6.145
## B-Spline: phi3  0.661 0.479 -0.278  1.599 -2.601 0.777 -4.125 -1.078 -5.315
## B-Spline: phi4 -0.681 0.529 -1.718  0.356 -1.027 0.640 -2.281  0.228 -4.704
##                   SE     LL     UL
## B-Spline: phi1 0.127 -4.182 -3.683
## B-Spline: phi2 0.345 -6.820 -5.469
## B-Spline: phi3 0.484 -6.263 -4.366
## B-Spline: phi4 0.315 -5.321 -4.087
## 
## Knots:
##       h1 h2 h3
## knot1  0  0  0
## knot2 60 60 60

## [1] "bs nofrail"
## 
## Analysis of independent semi-competing risks data 
## B-Spline baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Non-frailty model fit
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.166 0.028  0.111  0.221   5.944      0
## x2       0.423 0.029  0.367  0.479  14.816      0
## x3      -0.215 0.028 -0.270 -0.160  -7.652      0
## x1       0.330 0.055  0.223  0.437   6.056      0
## x2       0.500 0.056  0.390  0.610   8.900      0
## x3      -0.278 0.055 -0.386 -0.169  -5.023      0
## x1       0.507 0.044  0.421  0.592  11.613      0
## x2       0.547 0.044  0.462  0.632  12.560      0
## x3      -0.646 0.044 -0.733 -0.560 -14.624      0
## t1_cat2 -0.329 0.093 -0.510 -0.147  -3.549      0
## t1_cat3 -0.552 0.108 -0.763 -0.340  -5.119      0
## t1_cat4 -1.001 0.234 -1.460 -0.543  -4.280      0
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## B-Spline baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL    UL
## x1           1.181 1.118 1.247      1.391 1.250 1.548      1.660 1.524 1.808
## x2           1.526 1.443 1.614      1.648 1.477 1.840      1.728 1.587 1.882
## x3           0.807 0.764 0.852      0.758 0.680 0.844      0.524 0.481 0.571
## t1_cat2         NA    NA    NA         NA    NA    NA      0.720 0.600 0.863
## t1_cat3         NA    NA    NA         NA    NA    NA      0.576 0.466 0.712
## t1_cat4         NA    NA    NA         NA    NA    NA      0.367 0.232 0.581
## 
## Non-frailty model fit
## 
## Baseline hazard function components:
##                 h1-PM    SE     LL     UL  h2-PM    SE     LL     UL  h3-PM
## B-Spline: phi1 -1.328 0.045 -1.416 -1.239 -3.140 0.099 -3.333 -2.947 -2.625
## B-Spline: phi2 -8.099 0.263 -8.614 -7.584 -6.885 0.456 -7.778 -5.991 -6.321
## B-Spline: phi3 -0.764 0.434 -1.615  0.087 -4.352 0.754 -5.830 -2.874 -4.107
## B-Spline: phi4 -5.951 0.353 -6.643 -5.258 -6.130 0.549 -7.206 -5.053 -4.618
##                   SE     LL     UL
## B-Spline: phi1 0.098 -2.818 -2.432
## B-Spline: phi2 0.323 -6.954 -5.689
## B-Spline: phi3 0.473 -5.035 -3.179
## B-Spline: phi4 0.307 -5.221 -4.016
## 
## Knots:
##       h1 h2 h3
## knot1  0  0  0
## knot2 60 60 60

## [1] "rp frail"
## 
## Analysis of independent semi-competing risks data 
## Royston-Parmar baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Variance of frailties, theta:
##  theta     SE     LL     UL   test pvalue 
##  0.749  0.132  0.530  1.058 60.019  0.000 
## SE computed from SE(log(theta)) via delta method.
## Bounds formed for log(theta) and exponentiated.
## Likelihood ratio of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.242 0.039  0.165  0.319   6.157  0.000
## x2       0.610 0.049  0.515  0.706  12.509  0.000
## x3      -0.309 0.041 -0.390 -0.229  -7.549  0.000
## x1       0.428 0.063  0.304  0.551   6.780  0.000
## x2       0.736 0.073  0.594  0.879  10.103  0.000
## x3      -0.395 0.065 -0.521 -0.268  -6.108  0.000
## x1       0.707 0.059  0.591  0.823  11.934  0.000
## x2       0.892 0.073  0.749  1.035  12.245  0.000
## x3      -0.878 0.062 -1.001 -0.756 -14.055  0.000
## t1_cat2 -0.063 0.126 -0.310  0.183  -0.504  0.614
## t1_cat3  0.113 0.177 -0.233  0.459   0.640  0.522
## t1_cat4  0.235 0.326 -0.403  0.873   0.722  0.471
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## Royston-Parmar baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL    UL
## x1           1.274 1.179 1.376      1.533 1.355 1.735      2.028 1.806 2.278
## x2           1.841 1.673 2.026      2.088 1.810 2.409      2.441 2.116 2.815
## x3           0.734 0.677 0.795      0.674 0.594 0.765      0.415 0.368 0.470
## t1_cat2         NA    NA    NA         NA    NA    NA      0.939 0.733 1.201
## t1_cat3         NA    NA    NA         NA    NA    NA      1.120 0.792 1.582
## t1_cat4         NA    NA    NA         NA    NA    NA      1.265 0.668 2.395
## 
## Variance of frailties:
##  theta     SE     LL     UL   test pvalue 
##  0.749  0.132  0.530  1.058 60.019  0.000 
## SE computed from log(theta) via delta method. Bounds exponentiated from log(theta).
## Likelihood ratio test of theta=0 vs. theta>0 using mixture of chi-squareds null.
## 
## Baseline hazard function components:
##                       h1-PM    SE     LL     UL  h2-PM    SE      LL     UL
## Royston-Parmar: phi1 -0.702 0.180 -1.055 -0.350 -1.958 0.360  -2.665 -1.252
## Royston-Parmar: phi2  2.449 0.133  2.188  2.709  2.030 0.251   1.537  2.523
## Royston-Parmar: phi3 -6.821 0.363 -7.533 -6.109 -9.701 0.690 -11.054 -8.348
## Royston-Parmar: phi4  6.296 0.289  5.730  6.863  6.264 0.491   5.302  7.226
##                       h3-PM    SE     LL     UL
## Royston-Parmar: phi1 -1.607 0.212 -2.023 -1.191
## Royston-Parmar: phi2  1.348 0.136  1.080  1.615
## Royston-Parmar: phi3 -8.636 0.439 -9.497 -7.775
## Royston-Parmar: phi4  4.777 0.247  4.292  5.261
## 
## Knots:
##           h1     h2     h3
## knot1  0.000  0.001  0.005
## knot2  0.709  1.479  3.196
## knot3  6.083  8.465 13.477
## knot4 59.457 59.677 59.627

## [1] "rp nofrail"
## 
## Analysis of independent semi-competing risks data 
## Royston-Parmar baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Non-frailty model fit
## 
## Regression coefficients:
##           beta    SE     LL     UL       z pvalue
## x1       0.158 0.028  0.103  0.212   5.643      0
## x2       0.403 0.028  0.348  0.459  14.184      0
## x3      -0.206 0.028 -0.261 -0.151  -7.325      0
## x1       0.327 0.055  0.220  0.434   5.987      0
## x2       0.490 0.056  0.379  0.600   8.714      0
## x3      -0.272 0.055 -0.380 -0.164  -4.918      0
## x1       0.500 0.044  0.414  0.585  11.441      0
## x2       0.542 0.044  0.457  0.628  12.437      0
## x3      -0.640 0.044 -0.726 -0.553 -14.488      0
## t1_cat2 -0.327 0.093 -0.509 -0.145  -3.529      0
## t1_cat3 -0.570 0.107 -0.781 -0.360  -5.308      0
## t1_cat4 -1.008 0.234 -1.466 -0.550  -4.310      0
## 
## Note: Covariates are arranged in order of transition number, 1->3.
## 
## Analysis of independent semi-competing risks data 
## Royston-Parmar baseline hazard specification
## semi-Markov specification for h3
## Confidence level: 95%
## 
## Hazard ratios:
##         exp(beta1)    LL    UL exp(beta2)    LL    UL exp(beta3)    LL    UL
## x1           1.171 1.108 1.237      1.387 1.246 1.543      1.648 1.513 1.795
## x2           1.497 1.416 1.583      1.632 1.461 1.821      1.720 1.579 1.873
## x3           0.814 0.770 0.860      0.762 0.684 0.849      0.528 0.484 0.575
## t1_cat2         NA    NA    NA         NA    NA    NA      0.721 0.601 0.865
## t1_cat3         NA    NA    NA         NA    NA    NA      0.565 0.458 0.698
## t1_cat4         NA    NA    NA         NA    NA    NA      0.365 0.231 0.577
## 
## Non-frailty model fit
## 
## Baseline hazard function components:
##                       h1-PM    SE     LL     UL   h2-PM    SE      LL     UL
## Royston-Parmar: phi1 -0.568 0.163 -0.887 -0.248  -1.831 0.331  -2.479 -1.183
## Royston-Parmar: phi2  2.123 0.115  1.898  2.348   1.541 0.230   1.090  1.992
## Royston-Parmar: phi3 -7.280 0.343 -7.951 -6.608 -10.169 0.672 -11.486 -8.852
## Royston-Parmar: phi4  5.362 0.225  4.921  5.804   5.180 0.438   4.322  6.038
##                       h3-PM    SE     LL     UL
## Royston-Parmar: phi1 -1.095 0.188 -1.463 -0.726
## Royston-Parmar: phi2  1.436 0.128  1.186  1.686
## Royston-Parmar: phi3 -7.232 0.383 -7.981 -6.482
## Royston-Parmar: phi4  4.360 0.235  3.899  4.821
## 
## Knots:
##           h1     h2     h3
## knot1  0.000  0.001  0.005
## knot2  0.709  1.479  3.196
## knot3  6.083  8.465 13.477
## knot4 59.457 59.677 59.627

Baseline Plots
#specifically, we will look at semi-markov model with categorical t1
#first, create "dummy" data to predict baselines for each t1 category
t1cat_pred_mat <- as.data.frame(matrix(data = 0,nrow=4,ncol=3,dimnames = list(NULL,c("x1","x2","x3"))))
t1cat_pred_mat$t1_cat2 <- c(0,1,0,0)
t1cat_pred_mat$t1_cat3 <- c(0,0,1,0)
t1cat_pred_mat$t1_cat4 <- c(0,0,0,1)

#let's just look at frailty-based models
frail <- "frail"

for(bl in c("wb","bs","pw","rp")){
  par(mfrow=c(2,3))
  pred <- SemiCompRisksFreq:::predict.Freq_HReg2(std_freq_fit_list[[paste0(bl,"_t1cat_",frail)]], tseq=t_seq)
  for(ty in c("h","S")){
    for(i in 1:2){
      plot_mat <- pred[[paste0(ty,".",i)]][,paste0(c(ty,"LL","UL"),".",i)]
      matplot(x=t_seq, ylim = if(ty=="S") c(0,1) else NULL,
              y=plot_mat, type="l", lty=c(1,3,3), col="black",
              ylab=switch(paste0(ty,"_",i),
                          "h_1"="Cause-Specific Hazard of Non-Terminal",
                          "h_2"="Cause-Specific Hazard of Terminal",
                          "S_1"="Survivor Function of Non-Terminal",
                          "S_2"="Survivor Function of Terminal"),
              xlab=if(i==3) "Sojourn Time" else "Time",
              add = FALSE)
      if(ty=="S" & i==1){
        legend(x="bottomleft",lty=c(1,3), legend=c("Estimate","95% CI"),col="black")
      }
    }
    plot_mat <- sapply(1:NROW(t1cat_pred_mat), function(j){
      pred <- SemiCompRisksFreq:::predict.Freq_HReg2(std_freq_fit_list[[paste0(bl,"_t1cat_",frail)]],
                                                     tseq=t_seq, x3new = as.matrix(t1cat_pred_mat[j,]))
      pred[[paste0(ty,".",3)]][[paste0(ty,".",3)]]
    })
    matplot(x=t_seq, #sub=bl,
            ylim = if(ty=="S") c(0,1) else NULL,
            y=plot_mat, type="l", col = 1 + 1:NROW(t1cat_pred_mat),
            lty = ifelse(frail=="nofrail",2,1),
            ylab=switch(ty,"h"="Conditional Hazard of Delivery",
                        "S"="Conditional Survivor Function of Delivery"),
            xlab="Sojourn Time (Weeks)", add = FALSE)
  }
  par(mfrow=c(1,1))
}

Forest Plots
####function for generating labeled forest plot from ID model fit ("standard")
gen_forest_plot <- function(complete_coefs,
                            title=""){
  tabletext <- rownames(complete_coefs)
  forestplot(tabletext,
             mean = cbind(complete_coefs[,1],complete_coefs[,4],complete_coefs[,7]),
             lower = cbind(complete_coefs[,2],complete_coefs[,5],complete_coefs[,8]),
             upper = cbind(complete_coefs[,3],complete_coefs[,6],complete_coefs[,9]),
             title = title, xlog=TRUE,
             boxsize = .1,
             zero=1, xticks.digits = 1,
             xlab="Hazard Ratio (Estimate, 95% CrI)",
             legend=c("Non-Terminal", "Terminal without\nNon-Terminal",
                      "Terminal after\nNon-Terminal"),
             col= fpColors(box=three_color_cb,lines=three_color_cb),
             lty.ci = c(1, 1,1),
             lwd.ci = c(1.5,1.5,1.5),
             txt_gp = fpTxtGp(xlab=gpar(cex=1.2),ticks=gpar(cex=1.15),),
             hrzl_lines=rep(list(gpar(lwd=2, lineend="butt", col="lightgray")),
                            length(tabletext)+1)
  )
}


for(t1_ind in c("not1cat",
                "t1cat",
                NULL)){
  for(haz in c("wb",
               "pw",
               "bs",
               "rp",
               NULL)){
    for(frail in c("frail",
                   "nofrail",
                   NULL)){
      temp_coef_mat <- SemiCompRisksFreq:::summary.Freq_HReg2(std_freq_fit_list[[paste0(haz,"_",t1_ind,"_",frail)]])$HR
      temp_coef_mat_sub <- temp_coef_mat[stringr::str_detect(rownames(temp_coef_mat),
                                                    "time_to_admit",negate = TRUE),]
      print(gen_forest_plot(complete_coefs = temp_coef_mat_sub))
    }
  }
}

####Prediction Plots

#used to plot predictions
library(dplyr)
library(tidyr)

get_sample_plot <- function(pred_selected, t_cutoff_vec, i,
                            plot_type = "stacked"){
  # browser()
  #create long-format data to plot
  plot_frame <- cbind(Time=t_cutoff_vec,
                      as.data.frame(pred_selected)) %>%
    pivot_longer(cols=starts_with("p_"), names_to = "Outcome", values_to = "Probability")

  plot_frame_factor <- plot_frame %>%
    mutate(Outcome = recode_factor(Outcome,
                                   "p_neither"="Pregnant without\nPreeclampsia",
                                   "p_tonly"="Delivered without\nPreeclampsia",
                                   "p_both"="Delivered with\nPreeclampsia",
                                   "p_ntonly"="Pregnant with\nPreeclampsia"))

  color_temp <- four_color_cb

  gg <- ggplot(plot_frame_factor, aes(x=Time, y=Probability))
  if(plot_type == "line"){
    gg <- gg + geom_line(aes(colour=Outcome)) #+
#      guides(colour = guide_legend(reverse = T))
  } else{
   gg <- gg + geom_area(aes(colour=Outcome, fill=Outcome)) +
     scale_fill_manual(values = color_temp) #+
#     guides(colour = guide_legend(reverse = T),fill = guide_legend(reverse = T))
  }
  gg <- gg +
    scale_color_manual(values = color_temp) +
    labs(tag = LETTERS[i]) +
    theme_classic() + theme(text=element_text(size=14))
}

#https://stackoverflow.com/questions/39011020/ggplot-align-plots-together-and-add-common-labels-and-legend
#extract legend
get_legend <- function(myggplot){
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}
















#### PREDICTION PLOTS ####
#same set of covariates for every transition (except categorical), so this same matrix used for all inputs
test_xtemp <- as.matrix(scrData[,c("x1","x2","x3")])

#### Sample Predictions ####

#now, to create four sample patients 
cov_mat <- cbind("x1"=c(0,1,0,0),
                 "x2"=c(0,0,1,0),
                 "x3"=c(0,0,0,1))

#first, generate predictions for every sample subject across time, and corresponding plots
std_pred_c_selected_list <- list()
std_pred_m_selected_list <- list()
for(t1_ind in c("not1cat","t1cat")){
  for(frail_ind in c("frail",#"nofrail",
                     NULL)){
    print(paste0(t1_ind,"_",frail_ind))
    temp_para <- std_freq_fit_list[[paste0("wb_",t1_ind,"_",frail_ind)]]$estimate
    pred_c_selected <- calc_risk(para = temp_para,
                          Xmat1 = cov_mat, Xmat2 = cov_mat,
                          Xmat3 = cov_mat,
                          frailty = TRUE,  #say frailty=TRUE even if it's not doesn't matter in this case
                          model = "semi-markov",
                          type = "conditional", gamma = 1,
                          hazard = "weibull",
                          t_cutoff = t_seq, tol = 1e-3,
                          h3_tv = if(t1_ind == "t1cat") "piecewise" else "none",
                          h3tv_knots = if(t1_ind == "t1cat") c(0,.7,4.4,27) else NULL)
    # apply(pred_c_selected,MARGIN = c(3,1),FUN = sum)
    std_pred_c_selected_list[[paste0(t1_ind,"_",frail_ind)]] <- pred_c_selected

    if(frail_ind == "frail"){
      #just testing to see if it worked for larger frailty variances
      # temp_para2 <- temp_para
      # temp_para2[7] <- 0.5
      pred_m_selected <- calc_risk(para = temp_para,
                            Xmat1 = cov_mat, Xmat2 = cov_mat,
                            Xmat3 = cov_mat,
                            frailty = TRUE,  #say frailty=TRUE even if it's not doesn't matter in this case
                            model = "semi-markov", #n_quad=15,
                            type = "marginal", hazard = "weibull",
                            t_cutoff = t_seq, tol = 1e-3,
                            h3_tv = if(t1_ind == "t1cat") "piecewise" else "none",
                            h3tv_knots = if(t1_ind == "t1cat") c(0,.7,4.4,27) else NULL)
      std_pred_m_selected_list[[paste0(t1_ind,"_",frail_ind)]] <- pred_m_selected
    }
  }
}
## [1] "not1cat_frail"
## [1] "t1cat_frail"
#next, generate plots for the sample subjects
std_profile_plot_list <- list()
for(t1_ind in c("not1cat","t1cat",
                NULL)){
  for(frail_ind in c("frail",#"nofrail",
                     NULL)){
    temp_list <- list()
    #generate four plots
    for(i in 1:4){
      temp_list[[i]] <-
        get_sample_plot(pred_selected = std_pred_c_selected_list[[paste0(t1_ind,"_",frail_ind)]][,,paste0("i",i)],
                        t_cutoff_vec = t_seq, i=i)
    }
    #get one horizontal legend for everyone and then strip them of legends and labels
    gg_legend_horizontal <- get_legend(temp_list[[1]] + theme(legend.position = "bottom",legend.title = element_blank()))
    for(i in 1:4){
      temp_list[[i]] <- temp_list[[i]] + theme(legend.position="none",axis.title=element_blank())
    }
    #make 2 by 2 plot grid without legend or labels
    std_profile_plot_list[[paste0(t1_ind,"_",frail_ind)]] <-
      cowplot::plot_grid(temp_list[[1]], temp_list[[2]],
                         temp_list[[3]], temp_list[[4]],ncol =2, align="v")
  }
}

#finally, plot the final subject sample plots
# cairo_pdf(file = paste0(figurepath,"sampleplots_std_freq_2023-03-10.pdf"),
#           width=9,height=6,onefile = TRUE)
for(t1_ind in c(#"not1cat",
                "t1cat",
                NULL)){
  for(frail_ind in c("frail",#"nofrail",
                     NULL)){
    gridExtra::grid.arrange(
      gridExtra::arrangeGrob(std_profile_plot_list[[paste0(t1_ind,"_",frail_ind)]],
                             bottom=grid::textGrob(label= "Time",
                                                   gp= gpar(fontsize=14,col="black")),
                             left=grid::textGrob(label="Probability of Having Experienced Outcome",
                                                 rot=90, gp= gpar(fontsize=14,col="black"))),
      gg_legend_horizontal,heights=c(6,1))
  }
}

#### Gamma-stratified Sample Predictions ####
std_pred_c_strat_gamma_list <- list()
std_gamma_strat_plot_list <- list()
std_gamma_strat_plot_wide_list <- list()
for(t1_ind in c("not1cat","t1cat")){
  print(paste0(t1_ind,"_frail"))
  gamma_vec2 <- exp(c(-0.75,-0.25,0.25,0.75))
  temp_para <- std_freq_fit_list[[paste0("wb_",t1_ind,"_",frail_ind)]]$estimate
  std_pred_c_strat_gamma_list[[paste0(t1_ind,"_frail")]] <- list()
  for(i in 1:length(gamma_vec2)){
    pred_c_strat_gamma <- calc_risk(para = temp_para,
             Xmat1 = cov_mat[4,,drop=FALSE], Xmat2 = cov_mat[4,,drop=FALSE],
             Xmat3 = cov_mat[4,,drop=FALSE],
             frailty = TRUE,  #say frailty=TRUE even if it's not doesn't matter in this case
             model = "semi-markov",
             type = "conditional", gamma = gamma_vec2[i],
             hazard = "weibull",
             t_cutoff = t_seq, tol = 1e-3,
             h3_tv = if(t1_ind == "t1cat") "piecewise" else "none",
             h3tv_knots = if(t1_ind == "t1cat") c(0,.7,4.4,27) else NULL)

    temp_list[[i]] <-
      get_sample_plot(pred_selected = pred_c_strat_gamma,t_cutoff_vec = t_seq,
              i = NULL,plot_type = "stacked") +
      theme(legend.position="none",axis.title=element_blank()) +
      labs(subtitle = paste0("log \U03B3=",log(gamma_vec2[i])))
    std_pred_c_strat_gamma_list[[paste0(t1_ind,"_frail")]][[i]] <- pred_c_strat_gamma
  }
  std_gamma_strat_plot_list[[paste0(t1_ind,"_frail")]] <-
    cowplot::plot_grid(temp_list[[1]], temp_list[[2]],
                       temp_list[[3]], temp_list[[4]],ncol =2, align="v")
  std_gamma_strat_plot_wide_list[[paste0(t1_ind,"_frail")]] <-
    cowplot::plot_grid(temp_list[[1]], temp_list[[2]],
                       temp_list[[3]], temp_list[[4]],ncol=4, align="v")
}
## [1] "not1cat_frail"
## [1] "t1cat_frail"
#finally, plot the final sample plots
# cairo_pdf(file = paste0(figurepath,"gammastratplots_std_freq_2023-10-10.pdf"),
#           width=9,height=6,onefile = TRUE)
for(t1_ind in c("not1cat","t1cat")){

  gridExtra::grid.arrange(
    gridExtra::arrangeGrob(std_gamma_strat_plot_list[[paste0(t1_ind,"_frail")]],
                           bottom=grid::textGrob(label= "Time",
                                                 gp= gpar(fontsize=14,col="black")),
                           left=grid::textGrob(label="Probability of Having Experienced Outcome",
                                               rot=90, gp= gpar(fontsize=14,col="black"))),
    gg_legend_horizontal,heights=c(6,1))
}

#### Week 28 Conditional Plots ####
t_cutoff_vec_term <- seq(from=1,to=60,by = 0.1)
cov_mat_28 <- cbind(cov_mat,
                    t1_cat2=c(1,1,1,1),
                    t1_cat3=c(0,0,0,0),
                    t1_cat4=c(0,0,0,0))
#make a matrix of plots
std_term_plot_list <- list()
for(t1_ind in c("not1cat","t1cat")){
  for(frail_ind in c("frail",#"nofrail",
                     NULL)){
    print(paste0(t1_ind,"_",frail_ind))
    # std_term_plot_list[[paste0(t1_ind,"_",frail_ind)]] <-
    #   matrix(nrow=length(t_cutoff_vec_term),ncol=4,dimnames = list(NULL,LETTERS[1:4]))
      std_term_plot_list[[paste0(t1_ind,"_",frail_ind)]] <-
        t(calc_risk_term(para = temp_para,
                       Xmat3 = cov_mat,
                       hazard = "weibull", frailty=TRUE,
                       t_cutoff = t_cutoff_vec_term, t_start=0.8,
                       # type = if(frail_ind=="frail") "marginal" else "conditional",
                       type = "conditional",
                       gamma = 1, tol = 1e-3,
                       h3_tv = if(t1_ind=="t1cat") "piecewise" else "none",
                       h3tv_knots =  c(0,.7,4.4,27))) #h3 effect of t1 was at 28, 32, 34, and 37
      colnames(std_term_plot_list[[paste0(t1_ind,"_",frail_ind)]]) <- LETTERS[1:4]
  }
}
## [1] "not1cat_frail"
## [1] "t1cat_frail"
#note, this generates plots out of order from other loops because it's easier
for(frail_ind in c("frail",#"nofrail",
                   NULL)){
  for(t1_ind in c("not1cat","t1cat")){
    print(paste0(t1_ind,"_",frail_ind))

    matplot(x=t_cutoff_vec_term,
            y=1-std_term_plot_list[[paste0(t1_ind,"_",frail_ind)]],
            type="l", lty=1, lwd=2,
            col = four_color_qual,
            ylim = c(0,1), axes=FALSE,
            ylab="Cumulative Probability of Terminal after Non-terminal at Time 1",
            xlab="Gestational Age (Weeks)", add = FALSE )
    axis(side = 1, at = c(0,20,40,60))
    axis(side = 2, at = c(0,0.25,0.5,0.75,1))
    legend(x="topleft",legend = LETTERS[1:4],fill = four_color_qual,
           title = "Individual")
  }
}
## [1] "not1cat_frail"

## [1] "t1cat_frail"